/* master do file for collapsing every ASI dataset to district level (asicode88) and creating a district level dataset
Part 1 creates dataset of all factories (urban & rural)while Part 2 creates a dataset with only rural factories
Note: Labor regs: In Karnataka, unlike all other states, labor regulation changed once between 1987-93.8
Note on ASI years: REFERENCE YEAR   for ASI 2001-2002 is the accounting year of the factory ending on 31 st March, 2002 (ASI Documentation). 
Unbalanced panel of around 330 districts */


clear
set more off
set mem 400m
set matsize 6000


*Alternate filepath

global do		"C:\Users\Siddharth\Desktop\factories\do" 
global rain		"C:\Users\Siddharth\Desktop\factories\data\rainfall"
global data		"C:\Users\Siddharth\Desktop\factories\data"



/* 
global log		"C:\research\factories\log"
global do		"C:\research\factories\do"
global rain		"C:\research\factories\data\rainfall"
global data		"C:\research\factories\data"
global output 	"C:\research\factories\output"
*/

*varlist
global varlist  "factories closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workers workers_2 mandays workerswages totaloutput depreciation electricity fuel materials valueadded profits"

* generate rain data from delaware raw precip files
do "$do\gen_rain.do"

* merge rain data with ASI district codes by calculating closest rainfall grid point, and gen rainfall shocks
do "$do\merge_rain.do"

use "$data\asi_50_55_43_code91.dta", clear
sort asicode99
tempfile codes
save `codes'

use "$data\asi_50_55_43_code91.dta", clear
sort code91
merge code91 using "$rain\district_rain_wide_code91.dta"
keep if _m == 3
drop _m  bsr96_district bsr00_district asi_district_name asi_state_ut asicode99 distt_id50 distt_id43 distt_id55 code91  longitude latitude glon glat
collapse (mean) shock30* shock50* shockdev* shockperc* shockctsdev* shockpctile* shocknorm* shockhigh* shocklow*, by(asicode88)
reshape long shock30 shock50 shockdev shockperc shockctsdev  shockpctile shocknorm shockhigh shocklow, i(asicode88) j(year)
* some non-integer shockpctile values
replace shockpctile = -1 if shockpctile < 0
replace shockpctile = 1 if shockpctile > 0
drop if asicode88 == .
sort asicode88 year
tempfile rain_asicode88
save `rain_asicode88'

***************************************
*Part 1: All factories(rural and urban)
*This is our main dataset
***************************************

* ASI 1987-88
* clean factory data
do "$do\process88_sid.do"
keep asicode88 price88 multiplier $varlist
egen x = mean(price88)
replace price88 = x if price88 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price88
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
collapse (sum) $varlist, by (asicode88)
ge year = 1987
tempfile dist88
save `dist88'

* ASI 1990-91
do "$do\process91_sid.do"
keep asicode88 price91  multiplier $varlist
*missing price index
egen x = mean(price91)
replace price91 = x if price91 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workerswages totaloutput depreciation  electricity  fuel materials valueadded profits"
foreach v of local l{
replace `v' = `v'/price91
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
collapse (sum) $varlist, by (asicode88)
ge year = 1990
tempfile dist91
save `dist91'

* ASI 1993-94
do "$do\process94 - ach.do"
keep asicode99 price94 multiplier $varlist
*missing price index
egen x = mean(price94)
replace price94 = x if price94 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price94
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
collapse (sum) $varlist, by (asicode99)
sort asicode99
merge asicode99 using `codes'
keep if _m == 3
drop _m
collapse (sum) $varlist, by(asicode88)
ge year = 1993
tempfile dist94
save `dist94'


*matching different version of statecodes
u `codes', clear
ge statecode88 =  int(asicode88/100)
ge statecode =  int(asicode99/100)
keep statecode88 statecode
duplicates drop
drop if statecode88 == .
sort statecode
tempfile scodes
save `scodes'


*  labor regulation data
use $data\labregrainfall.dta, clear
*sort statecode
*merge statecode using `scodes'
*keep if _m == 3
*drop _m
sort statecode88 year
tempfile lab_temp
save `lab_temp'


* append factory data and merge with rain data

use `dist88', clear
append using `dist91'
append using `dist94'

sort asicode88 year
merge asicode88 year using `rain_asicode88'

tab _merge
keep if _merge ==3
drop _merge

ge statecode88 =  int(asicode88/100)

sort statecode88 year
merge statecode88 year using `lab_temp', keep(statecode88 year nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow)

tab _merge
keep if _merge==3
drop _merge	
tempfile asi
save `asi', replace

* merging agricultural yields
* yield is constant (1985) price-weighted index of district outputs of rice, wheat, maize, jowar, bajra and barley, divided by their cultivated area.

* Agricultural yields data 
do "$do\agr_yield.do"
tempfile yield879093_asicode88
save `yield879093_asicode88', replace

use `asi'
sort asicode88 year
merge asicode88 year using `yield879093_asicode88'
drop _m
sort asicode88
tempfile asi2
save `asi2', replace 

* Agrarian in 1987 from NSS Unemployment data.

use "$data\nss435055_asicode88.dta", clear

keep n1_r_88 n4_r_88 n11_r_94  asicode88

ge landed88 = (n1_r_88/n11_r_94)*100
ge landless88 = (n4_r_88/n11_r_94)*100
ge agrarian88 = ((n1_r_88 + n4_r_88)/n11_r_94)*100

drop n1_r_88 n4_r_88 n11_r_94  


label var landed88 " % of working age who were self-employed in farm HH enterprise in 1988 "
label var landless88 "% of working age who were wage/casual workers in agr sector in 1988 "
label var agrarian88 "% of working age who were employed in agr in 1988"                                                 

sort asicode88 
merge asicode88 using `asi2'
drop _m


sort asicode88
tempfile asi3
save `asi3', replace 


*merging initial k/o ratio and percent employment in food industries

do "$do\asi87_ratios.do"
merge asicode88 using `asi3'

drop _m

* filling in missing with state average

local l "landed88 landless88 agrarian88 food88 kqratio88"
foreach v of local l{
egen m`v' = mean(`v'), by(agrstate)
replace `v' = m`v' if `v' == .
drop m`v'
}


* logs 
local l " yield factories workers workers_2 totaloutput valueadded"
foreach v of local l{
ge l`v' = log(`v')
} 

tab year, ge(ydum)

* proe is proemployer state, prow is proworker state
*ge proe = nstrict < 0
*ge prow = nstrict > 0
*replace prow = . if nstrict == .


ge syr = year*100 + statecode
tab syr, ge(sydum)

* interactions
local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach w of local g{
ge `w'_`v' = `w'*`v'
}
}

local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
ge `w'_`v' = `w'*`v'
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach z of local g{
ge `z'_`w'_`v' = `z'*`w'*`v'
}
}
}

local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach z of local g{
ge `z'_`w' = `z'*`w'
}
}

/* Balanced panel: dropping districts which do not appear in all three years

drop if workers== .
egen x=seq(), by(asicode88)
egen y= max(x), by(asicode88)
keep if y == 3
drop y
*/

saveold "$data\district889194.dta", replace 

/* district ids of balanced panel
keep if year == 1987
keep asicode88
ge balanced = 1
sort asicode88
saveold "$data\balanced_code.dta", replace */

***********************************************************************
*PART 2: a dataset comprising only rural factories in every district
***********************************************************************

* ASI 1987-88
* clean factory data
/* do "$do\process88_sid.do"
keep if ruralurbancode == 1
keep asicode88 price88 multiplier $varlist
egen x = mean(price88)
replace price88 = x if price88 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price88
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
collapse (sum) $varlist, by (asicode88)
ge year = 1987
tempfile rdist88
save `rdist88'

* ASI 1990-91
do "$do\process91_sid.do"
keep if ruralurbancode == 1

keep asicode88 price91  multiplier $varlist
*missing price index
egen x = mean(price91)
replace price91 = x if price91 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workerswages totaloutput depreciation  electricity  fuel materials valueadded profits"
foreach v of local l{
replace `v' = `v'/price91
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
collapse (sum) $varlist, by (asicode88)
ge year = 1990
tempfile rdist91
save `rdist91'

* ASI 1993-94
do "$do\process94 - ach.do"
keep if ruralurbancode == 1

keep asicode99 price94 multiplier $varlist
*missing price index
egen x = mean(price94)
replace price94 = x if price94 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price94
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
collapse (sum) $varlist, by (asicode99)
sort asicode99
merge asicode99 using `codes'
keep if _m == 3
drop _m
collapse (sum) $varlist, by(asicode88)
ge year = 1993
tempfile rdist94
save `rdist94'


*matching different version of statecodes
u `codes', clear
ge statecode88 =  int(asicode88/100)
ge statecode =  int(asicode99/100)
keep statecode88 statecode
duplicates drop
drop if statecode88 == .
sort statecode
tempfile scodes
save `scodes'


* sort labor regulation data
use $data\laborreg.dta, clear
sort statecode
merge statecode using `scodes'
keep if _m == 3
drop _m
sort statecode88 year
tempfile lab_temp
save `lab_temp'


* append factory data and merge with rain data

use `rdist88', clear
append using `rdist91'
append using `rdist94'

sort asicode88 year
merge asicode88 year using `rain_asicode88'

tab _merge
keep if _merge ==3
drop _merge

ge statecode88 =  int(asicode88/100)

sort statecode88 year
merge statecode88 year using `lab_temp', keep(statecode88 year nstrict)

tab _merge
keep if _merge==3
drop _merge	
tempfile asi
save `asi', replace

* merging agricultural yields
* yield is constant (1985) price-weighted index of district outputs of rice, wheat, maize, jowar, bajra and barley, divided by their cultivated area.

* Agricultural yields data 
do "$do\agr_yield.do"
tempfile yield879093_asicode88
save `yield879093_asicode88', replace

use `asi'
sort asicode88 year
merge asicode88 year using `yield879093_asicode88'
drop _m
sort asicode88
tempfile asi2
save `asi2', replace 

* Agrarian in 1987 from NSS Unemployment data.

use "$data\nss435055_asicode88.dta", clear

keep n1_r_88 n4_r_88 n11_r_94  asicode88

ge landed88 = (n1_r_88/n11_r_94)*100
ge landless88 = (n4_r_88/n11_r_94)*100
ge agrarian88 = ((n1_r_88 + n4_r_88)/n11_r_94)*100

drop n1_r_88 n4_r_88 n11_r_94  


label var landed88 " % of working age who were self-employed in farm HH enterprise in 1988 "
label var landless88 "% of working age who were wage/casual workers in agr sector in 1988 "
label var agrarian88 "% of working age who were employed in agr in 1988"                                                 

sort asicode88 
merge asicode88 using `asi2'
drop _m


sort asicode88
tempfile asi3
save `asi3', replace 


*merging initial k/o ratio and percent employment in food industries

do "$do\asi87_ratios.do"
merge asicode88 using `asi3'

drop _m

* filling in missing with state average

local l "landed88 landless88 agrarian88 food88 kqratio88"
foreach v of local l{
egen m`v' = mean(`v'), by(agrstate)
replace `v' = m`v' if `v' == .
drop m`v'
}


* logs 
local l " yield factories workers workers_2 totaloutput valueadded"
foreach v of local l{
ge l`v' = log(`v')
} 

tab year, ge(ydum)

* proe is proemployer state, prow is proworker state
ge proe = nstrict < 0
ge prow = nstrict > 0
replace prow = . if nstrict == .


ge syr = year*100 + statecode
tab syr, ge(sydum)

* interactions
local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
ge proe_`v' = proe*`v'
ge prow_`v' = prow*`v'
ge nstrict_`v' = nstrict*`v'
}


local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
ge `w'_`v' = `w'*`v'
ge proe_`w'_`v' = proe*`w'*`v'
ge prow_`w'_`v' = prow*`w'*`v'
ge nstrict_`w'_`v' = nstrict*`w'*`v'
}
}

local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
ge proe_`w' = proe*`w'
ge prow_`w' = prow*`w'
}

/* Balanced panel: dropping districts which do not appear in all three years

drop if workers== .
egen x=seq(), by(asicode88)
egen y= max(x), by(asicode88)
keep if y == 3
drop y
*/

saveold "$data\rural_district889194.dta", replace */




